\defmodule {Tally}

\latex{A subclass of \class{StatProbe}.}
This type of statistical collector takes a sequence of real-valued
observations\latex{ $X_1,X_2,X_3,\dots$} and can return the average,
the variance, a confidence interval for the theoretical mean, etc.
Each call to \method{add}{} provides a new observation.
When the broadcasting to observers is activated,
the method \method{add}{} will also pass this new information to its
registered observers.
This type of collector does not memorize the individual observations,
but only their number, sum, sum of squares, maximum, and minimum.
The subclass \class{TallyStore} offers a collector that memorizes
the observations.
%  The collector can be reinitialized by \method{init}{}.

\bigskip\hrule

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{code}
\begin{hide}
/*
 * Class:        Tally
 * Description:  statistical collector
 * Environment:  Java
 * Software:     SSJ
 * Copyright (C) 2001  Pierre L'Ecuyer and Universite de Montreal
 * Organization: DIRO, Universite de Montreal
 * @author
 * @since

 * SSJ is free software: you can redistribute it and/or modify it under
 * the terms of the GNU General Public License (GPL) as published by the
 * Free Software Foundation, either version 3 of the License, or
 * any later version.

 * SSJ is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.

 * A copy of the GNU General Public License is available at
   <a href="http://www.gnu.org/licenses">GPL licence site</a>.
 */
\end{hide}
package umontreal.iro.lecuyer.stat;
\begin{hide}
import umontreal.iro.lecuyer.util.PrintfFormat;
import umontreal.iro.lecuyer.probdist.StudentDist;
import umontreal.iro.lecuyer.probdist.NormalDist;
import umontreal.iro.lecuyer.probdist.ChiSquareDist;
import java.util.logging.Level;
import java.util.logging.Logger;
\end{hide}

public class Tally extends StatProbe implements Cloneable\begin{hide} {
   private int numObs;
   private double sumSquares;
   private double curAverage;  // The average of the first numObs observations
   private double curSum2;     // The sum (xi - average)^2 of the first numObs
                               // observations.
   private Logger log = Logger.getLogger ("umontreal.iro.lecuyer.stat");

   private static enum CIType {CI_NONE, CI_NORMAL, CI_STUDENT};

   protected CIType confidenceInterval = CIType.CI_NONE;
   protected double level = 0.95;
   protected int digits = 3;
\end{hide}
\end{code}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection* {Constructors}
\begin{code}

   public Tally() \begin{hide} {
      super();
      init();
   } \end{hide}
\end{code}
  \begin{tabb}  Constructs a new unnamed \texttt{Tally} statistical probe.
 \end{tabb}
\begin{code}

   public Tally (String name) \begin{hide} {
      super();
      this.name = name;
      init();
   } \end{hide}
\end{code}
  \begin{tabb}  Constructs a new \texttt{Tally} statistical probe with
   name \texttt{name}.
 \end{tabb}
\begin{htmlonly}
   \param{name}{name of the tally}
\end{htmlonly}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsubsection* {Methods}
\begin{code}\begin{hide}

   public void init() {
       maxValue = Double.NEGATIVE_INFINITY;
       minValue = Double.POSITIVE_INFINITY;
       sumValue = 0.0;
       sumSquares = 0.0;
       curAverage = 0.0;
       curSum2 = 0.0;
       numObs = 0;
   }\end{hide}

   public void add (double x) \begin{hide} {
      if (collect) {
         if (x < minValue) minValue = x;
         if (x > maxValue) maxValue = x;
         numObs++;
         // Algorithme dans Knuth ed. 3, p. 232; voir Wikipedia
         // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#cite_note-1
         double y = x - curAverage;
         curAverage += y / numObs;
         curSum2 += y*(x - curAverage);
         // On pourrait utiliser l'algorithme correcteur de Kahan pour une
         // meilleure precision.
         // (voir http://en.wikipedia.org/wiki/Kahan_summation_algorithm)
      }
      notifyListeners (x);
   }\end{hide}
\end{code}
  \begin{tabb}  Gives a new observation \texttt{x} to the statistical collector.
   If broadcasting to observers is activated for this object,
   this method also transmits the new information to the
   registered observers by invoking the method
   \method{notifyListeners}{}.
 \end{tabb}
\begin{htmlonly}
   \param{x}{observation being added to this tally}
\end{htmlonly}
\begin{code}

   public int numberObs() \begin{hide} {
      return numObs;
   } \end{hide}
\end{code}
  \begin{tabb} Returns the number of observations given to this probe
   since its last initialization.
  \end{tabb}
\begin{htmlonly}
   \return{the number of collected observations}
\end{htmlonly}
\begin{code}

   public double average() \begin{hide} {
      if (numObs < 1) {
         //System.err.println (
         //    "******* Tally " + name + ":   calling average() with " + numObs +
         //    " Observation");
         log.logp (Level.WARNING, "Tally", "average",
            "Tally " + name + ":   calling average() with " + numObs +
             " observation");
         return Double.NaN;
      }
      return curAverage;
   }\end{hide}
\end{code}
  \begin{tabb}  Returns the average value of the observations since the last
   initialization.
 \end{tabb}
\begin{code}

   public double variance() \begin{hide} {
      // throws NumberObservationException {
      // if (numObs < 2) throw NumberObservationException;
      if (numObs < 2) {
         //System.err.println (
         //    "******* Tally " + name + ":   calling variance() with " + numObs +
         //    " Observation");
         log.logp (Level.WARNING, "Tally", "variance",
            "Tally " + name + ":   calling variance() with " + numObs +
             " observation");
         return Double.NaN;
      }

      return curSum2 / (numObs-1);
   }\end{hide}
\end{code}
  \begin{tabb}  Returns the sample variance of the observations since the last
   initialization.
   This returns \texttt{Double.NaN}
   if the tally contains less than two observations.
 \end{tabb}
\begin{htmlonly}
   \return{the variance of the observations}
\end{htmlonly}
\begin{code}

   public double standardDeviation() \begin{hide} {
      return Math.sqrt (variance());
   }\end{hide}
\end{code}
  \begin{tabb}  Returns the sample standard deviation of the observations
   since the last initialization.
   This returns \texttt{Double.NaN}
   if the tally contains less than two observations.
 \end{tabb}
\begin{htmlonly}
   \return{the standard deviation of the observations}
\end{htmlonly}
\begin{code}

   public void confidenceIntervalNormal (double level,
                                         double[] centerAndRadius) \begin{hide} {
      // Must return an array object, cannot return 2 doubles directly
      double z;
      if (numObs < 2) throw new RuntimeException (
          "Tally " + name +
          ": Calling confidenceIntervalStudent with < 2 Observations");
      centerAndRadius[0] =  average();
      z = NormalDist.inverseF01 (0.5 * (level + 1.0));
      centerAndRadius[1] = z * Math.sqrt (variance() / (double)numObs);
   }\end{hide}
\end{code}
  \begin{tabb} Computes a confidence interval on the mean.
  Returns, in elements 0 and 1 of the array
  object \texttt{centerAndRadius[]}, the center and half-length (radius)
  of a confidence interval on the true mean of the random variable $X$,
  with confidence level \texttt{level}, assuming that the $n$ observations
  given to this collector are independent and identically distributed
  (i.i.d.) copies of $X$, and that $n$ is large enough for the central limit
  theorem to hold.  This confidence interval is computed based on the statistic
  $$
    Z = {\bar X_n - \mu\latex{\over}\html{ / (}{S_{n,x}/\sqrt{n}}\html{)}}
  $$
  where $n$ is the number of observations given to this collector since its
  last initialization, $\bar X_n =$ \texttt{average()} is the average of these
  observations, $S_{n,x} =$ \texttt{standardDeviation()} is the empirical
  standard  deviation.  Under the assumption that the observations
  of $X$ are  i.i.d.{} and $n$ is large,
  $Z$ has the standard normal distribution.
  The confidence interval given by this method is valid \emph{only if}
  this assumption is approximately verified.
 \end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \param{centerAndRadius}{array of size 2 in which are returned the center
        and radius of the confidence interval, respectively}
\end{htmlonly}
\begin{code}

   public void confidenceIntervalStudent (double level,
                                          double[] centerAndRadius) \begin{hide} {
      // Must return an array object, cannot return 2 doubles directly
      double t;
      if (numObs < 2) throw new RuntimeException (
          "Tally " + name +
          ": Calling confidenceIntervalStudent with < 2 Observations");
      centerAndRadius[0] =  average();
      t = StudentDist.inverseF (numObs - 1, 0.5 * (level + 1.0));
      centerAndRadius[1] = t * Math.sqrt (variance() / (double)numObs);
   }\end{hide}
\end{code}
\begin{tabb} Computes a confidence interval on the mean.
  Returns, in elements 0 and 1 of the array
  object \texttt{centerAndRadius[]}, the center and half-length (radius)
  of a confidence interval on the true mean of the random variable $X$,
  with confidence level \texttt{level}, assuming that the observations
  given to this collector are independent and identically distributed
  (i.i.d.) copies of $X$, and that $X$ has the normal distribution.
  This confidence interval is computed based on the statistic
  $$
    T = {\bar X_n - \mu\latex{\over}\html{ / (}{S_{n,x}/\sqrt{n}}\html{)}}
  $$
  where $n$ is the number of observations given to this collector since its
  last initialization, $\bar X_n =$ \texttt{average()} is the average of these
  observations, $S_{n,x} =$ \texttt{standardDeviation()} is the empirical
  standard  deviation.  Under the assumption that the observations
  of $X$ are  i.i.d.{} and normally distributed,
  $T$ has the Student distribution with $n-1$ degrees of freedom.
  The confidence interval given by this method is valid \emph{only if}
  this assumption is approximately verified, or if $n$ is large enough
  so that $\bar X_n$ is approximately normally distributed.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \param{centerAndRadius}{array of size 2 in which are returned the center
        and radius of the confidence interval, respectively}
\end{htmlonly}
\begin{code}

   public String formatCINormal (double level, int d) \begin{hide} {
      PrintfFormat str = new PrintfFormat();
      double ci[] = new double[2];
      confidenceIntervalNormal (level, ci);
      str.append ("  " + (100*level) + "%");
      str.append (" confidence interval for mean (normal): (");
      str.append (7 + d, d-1, d, ci[0] - ci[1]).append (',');
      str.append (7 + d, d-1, d, ci[0] + ci[1]).append (" )" + PrintfFormat.NEWLINE);
      return str.toString();
}\end{hide}
\end{code}
\begin{tabb}  Similar to \method{confidenceIntervalNormal}{}.
   Returns the confidence interval in a formatted string of the form \\
\centerline{``\texttt{95\% confidence interval for mean (normal): (32.431,\, 32.487)}'',}
   using $d$ fractional decimal digits.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \param{d}{number of fractional decimal digits}
   \return{a confidence interval formatted as a string}
\end{htmlonly}
\begin{code}

   public String formatCINormal (double level) \begin{hide} {
      return formatCINormal (level, 3);
   }\end{hide}
\end{code}
\begin{tabb}   Equivalent to \texttt{formatCINormal (level, 3)}.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \return{a confidence interval formatted as a string}
\end{htmlonly}
\begin{code}

   public String formatCIStudent (double level, int d) \begin{hide} {
      PrintfFormat str = new PrintfFormat();
      double ci[] = new double[2];
      confidenceIntervalStudent (level, ci);
      str.append ("  " + (100*level) + "%");
      str.append (" confidence interval for mean (student): (");
      str.append (7 + d, d, d-1, ci[0] - ci[1]).append (',');
      str.append (7 + d, d, d-1, ci[0] + ci[1]).append (" )" + PrintfFormat.NEWLINE);
      return str.toString();
   }\end{hide}
\end{code}
  \begin{tabb}  Similar to \method{confidenceIntervalStudent}{}.
   Returns the confidence interval in a formatted string of the form\\
  \centerline{``\texttt{95\% confidence interval for mean (student): (32.431,\, 32.487)}'',}
   using $d$ fractional decimal digits.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \param{d}{number of fractional decimal digits}
   \return{a confidence interval formatted as a string}
\end{htmlonly}
\begin{code}

   public String formatCIStudent (double level) \begin{hide} {
      return formatCIStudent (level, 3);
   }\end{hide}
\end{code}
\begin{tabb}   Equivalent to \texttt{formatCIStudent (level, 3)}.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \return{a confidence interval formatted as a string}
\end{htmlonly}
\begin{code}

   public void confidenceIntervalVarianceChi2 (double level,
                                               double[] interval) \begin{hide} {
      // Must return an array object, cannot return 2 doubles directly
      if (numObs < 2) throw new RuntimeException (
          "Tally " + name +
          ":   calling confidenceIntervalVarianceChi2 with < 2 observations");
      double w = (numObs - 1)*variance();
      double x2 = ChiSquareDist.inverseF (numObs - 1, 0.5 * (1.0 + level));
      double x1 = ChiSquareDist.inverseF (numObs - 1, 0.5 * (1.0 - level));
      interval[0] = w / x2;
      interval[1] = w / x1;
   }\end{hide}
\end{code}
\begin{tabb} Computes a confidence interval on the variance.
  Returns, in elements 0 and 1 of array \texttt{interval}, the
  left and right boundaries $[I_1,I_2]$ of a confidence interval on the true
 variance $\sigma^2$ of the random variable $X$, with confidence level
  \texttt{level}, assuming that the observations
  given to this collector are independent and identically distributed
  (i.i.d.) copies of $X$, and that $X$ has the normal distribution.
  This confidence interval is computed based on the statistic
  $    \chi^2_{n-1} = (n-1)S^2_{n}/\sigma^2  $
  where $n$ is the number of observations given to this collector since its
  last initialization, and $S^2_n =$ \texttt{variance()} is the empirical
  variance of these observations.  Under the assumption that the observations
  of $X$ are  i.i.d.{} and normally distributed,
  $\chi^2_{n-1}$ has the chi-square distribution with $n-1$ degrees of freedom.
  Given the \texttt{level} $ = 1 - \alpha$,
  one has $P[\chi^2_{n-1} < x_1] = P[\chi^2_{n-1} > x_2] = \alpha/2$
  and $[I_1,I_2] = [(n-1)S^2_{n}/x_2,\; (n-1)S^2_{n}/x_1]$.
%  The confidence interval given by this method is valid \emph{only if}
%   this assumption is approximately verified, or if $n$ is large enough
%   so that $\bar X_n$ is approximately normally distributed.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \param{interval}{array of size 2 in which are returned the left
        and right boundaries of the confidence interval, respectively}
\end{htmlonly}
\begin{code}

   public String formatCIVarianceChi2 (double level, int d) \begin{hide} {
      PrintfFormat str = new PrintfFormat();
      double ci[] = new double[2];
      confidenceIntervalVarianceChi2 (level, ci);
      str.append ("  " + (100*level) + "%");
      str.append (" confidence interval for variance (chi2): (");
      str.append (7 + d, d, d-1, ci[0]).append (',');
      str.append (7 + d, d, d-1, ci[1]).append (" )" + PrintfFormat.NEWLINE);
      return str.toString();
}\end{hide}
\end{code}
\begin{tabb}  Similar to \method{confidenceIntervalVarianceChi2}{}.
   Returns the confidence interval in a formatted string of the form \\
\centerline{``\texttt{95.0\% confidence interval for variance (chi2):
   ( 510.642,  519.673 )}'',}
   using $d$ fractional decimal digits.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true variance}
   \param{d}{number of fractional decimal digits}
   \return{a confidence interval formatted as a string}
\end{htmlonly}
\begin{code}

   public String report() \begin{hide} {
      return report(level, digits);
   }\end{hide}
\end{code}
 \begin{tabb}  Returns a formatted string that contains a report on this probe.
 \end{tabb}
\begin{htmlonly}
   \return{a statistical report formatted as a string}
\end{htmlonly}
\begin{code}

   public String report (double level, int d) \begin{hide} {
      PrintfFormat str = new PrintfFormat();
      str.append ("REPORT on Tally stat. collector ==> " + name);
      str.append (PrintfFormat.NEWLINE + "    num. obs.      min          max        average     standard dev." + PrintfFormat.NEWLINE);
      str.append (7 + d, (int)numObs);   str.append (" ");
      str.append (9 + d, d, d-1, (double)minValue);   str.append (" ");
      str.append (9 + d, d, d-1, (double)maxValue);   str.append (" ");
      str.append (9 + d, d, d-1, (double)average());  str.append (" ");
      str.append (9 + d, d, d-1, standardDeviation());
      str.append (PrintfFormat.NEWLINE);

      switch (confidenceInterval) {
         case CI_NORMAL:
            str.append (formatCINormal (level, d));
            break;
         case CI_STUDENT:
            str.append (formatCIStudent (level, d));
            break;
      }

      return str.toString();
   }\end{hide}
\end{code}
 \begin{tabb}  Returns a formatted string that contains a report on this probe
  with a confidence interval level \texttt{level} using $d$
  fractional decimal digits.
 \end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the confidence
                 interval covers the true mean}
   \param{d}{number of fractional decimal digits}
   \return{a statistical report formatted as a string}
\end{htmlonly}
\begin{code}\begin{hide}

   public String shortReportHeader() {
      PrintfFormat pf = new PrintfFormat();
      if (showNobs)
         pf.append (-8, "num obs.").append ("  ");
      pf.append (-8, "   min").append ("   ");
      pf.append (-8, "   max").append ("   ");
      pf.append (-8, "   average").append ("   ");
      pf.append (-8, "   std. dev.");
      if (confidenceInterval != CIType.CI_NONE)
         pf.append ("   ").append (-12, "conf. int.");

      return pf.toString();
   }\end{hide}

   public String shortReport()\begin{hide} {
      PrintfFormat pf = new PrintfFormat();
      if (showNobs)
         pf.append (-8, numberObs());
      pf.append (9, 3, 2, min()).append ("   ");
      pf.append (9, 3, 2, max()).append ("   ");
      pf.append (10, 3, 2, average()).append ("   ");
      if (numberObs() >= 2)
         pf.append (11, 3, 2, standardDeviation());
      else
         pf.append (11, "---");

      if (confidenceInterval != CIType.CI_NONE) {
         double[] ci = new double[2];
         switch (confidenceInterval) {
         case CI_NORMAL:
            confidenceIntervalNormal (level, ci);
            break;
         case CI_STUDENT:
            confidenceIntervalStudent (level, ci);
            break;
         }
         pf.append ("   ").append ((100*level) + "% (");
         pf.append (9, 3, 2, ci[0] - ci[1]).append (',');
         pf.append (9, 3, 2, ci[0] + ci[1]).append (")");
      }

      return pf.toString();
   }\end{hide}
\end{code}
\begin{tabb}   Formats and returns a short
 statistical report for this tally.
 The returned single-line report contains the minimum value,
 the maximum value,
 the average, and the standard deviation, in that order, separated
 by three spaces.  If the number of observations is shown in the short report,
 a column containing the
 number of observations in this tally is added.
\end{tabb}
\begin{htmlonly}
   \return{the string containing the report.}
\end{htmlonly}
\begin{code}

    public String reportAndCIStudent (double level, int d) \begin{hide} {
      CIType oldCIType = confidenceInterval;

      try {
         confidenceInterval = CIType.CI_STUDENT;
         return report(level, d);
      } finally {
         confidenceInterval = oldCIType;
      }
  }\end{hide}
\end{code}
\begin{tabb} Returns a formatted string that contains a report on this probe (as in
    \method{report}{}), followed by a confidence interval (as in
    \method{formatCIStudent}{}), using $d$ fractional decimal digits.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \param{d}{number of fractional decimal digits}
   \return{a statistical report with a confidence interval, formatted as a string}
\end{htmlonly}
\begin{code}

   public String reportAndCIStudent (double level) \begin{hide} {
      return reportAndCIStudent (level, 3);
  }\end{hide}
\end{code}
\begin{tabb} Same as \method{reportAndCIStudent}{} \texttt{(level, 3)}.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
   \return{a statistical report with a confidence interval, formatted as a string}
\end{htmlonly}
\begin{code}

   public double getConfidenceLevel()\begin{hide} {
      return level;
   }\end{hide}
\end{code}
\begin{tabb}
   Returns the level of confidence for the intervals on the mean displayed in
   reports.  The default confidence level is 0.95.
\end{tabb}
\begin{htmlonly}
   \return{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
\end{htmlonly}
\begin{code}

   public void setConfidenceLevel (double level)\begin{hide} {
      if (level < 0.0)
         throw new IllegalArgumentException("level < 0");
      if (level >= 1.0)
         throw new IllegalArgumentException("level >= 1");
      this.level = level;
   }\end{hide}
\end{code}
\begin{tabb}
   Sets the level of confidence for the intervals on the mean displayed in
   reports.
\end{tabb}
\begin{htmlonly}
   \param{level}{desired probability that the (random) confidence
        interval covers the true mean (a constant)}
\end{htmlonly}
\begin{code}

   public void setConfidenceIntervalNone()\begin{hide} {
      confidenceInterval = CIType.CI_NONE;
   }\end{hide}
\end{code}
\begin{tabb}  Indicates that no confidence interval needs to be printed in
   reports formatted by \method{report}{}, and \method{shortReport}{}.
   This restores the default behavior of the reporting system.
\end{tabb}
\begin{code}

   public void setConfidenceIntervalNormal()\begin{hide} {
      confidenceInterval = CIType.CI_NORMAL;
   }\end{hide}
\end{code}
\begin{tabb}  Indicates that a confidence interval on the true mean, based on the
  central limit theorem, needs to be included in reports formatted by
 \method{report}{} and \method{shortReport}{}. The confidence interval is
 formatted using \method{formatCINormal}{}.
\end{tabb}
\begin{code}

   public void setConfidenceIntervalStudent()\begin{hide} {
      confidenceInterval = CIType.CI_STUDENT;
   }\end{hide}
\end{code}
\begin{tabb}  Indicates that a confidence interval on the true mean, based on the
  normality assumption, needs to be included in
   reports formatted by \method{report}{} and \method{shortReport}{}.
   The confidence interval is formatted using \method{formatCIStudent}{}.
\end{tabb}
\begin{code}

   public void setShowNumberObs (boolean showNumObs)\begin{hide} {
      showNobs = showNumObs;
   }\end{hide}
\end{code}
\begin{tabb}
   Determines if the number of observations must be displayed in reports.
  By default, the number of observations is displayed.
\end{tabb}
\begin{htmlonly}
   \param{showNumObs}{the value of the indicator.}
\end{htmlonly}
\begin{code}

   public Tally clone()\begin{hide} {
      try {
         return (Tally)super.clone();
      } catch (CloneNotSupportedException e) {
         throw new IllegalStateException ("Tally can't clone");
      }
   }\end{hide}
\end{code}
\begin{tabb} Clones this object.
\end{tabb}
\begin{code}\begin{hide}
}\end{hide}
\end{code}
